TODO

  • separate sd_limits and tides into two functions, latter calls former. sd_limits useful for umbrella plot by itself without infill.
  • improve choice of algo using engineering text
  • speed up the umbrella plot by joining sd limits in rather than calculating it repeatedly for identical rows.
library(tidyverse)
library(janitor)
library(purrr)
library(scrutiny) 
library(readxl)
library(knitr)
library(kableExtra)

min_decimals <- function(x, digits = 2) {
  sprintf(paste0("%.", digits, "f"), x)
}


# mean <- 17.50
# sd <- 12.40
# n_obs <- 35
# 
# min_val <- 0
# max_val <- 63

# tides <- function(n_obs, mean, sd, min_val, max_val, sd_prec = NULL, n_items = 1) {
#   
#   if(is.null(sd_prec)){
#     sd_prec <- max(nchar(sub("^[0-9]*", "", mean)) - 1, 0)
#   }
#   
#   result <- c(-Inf, Inf)
# 
#   aMax <- min_val
#   aMin <- floor(mean*n_items)/n_items
#   bMax <- min(max(max_val, min_val + 1, aMin + 1), max_val)   # Adjusted here
#   bMin <- min(aMin + 1/n_items, max_val)                      # Adjusted here
#   total <- round(mean * n_obs * n_items)/n_items
#   
#   poss_values <- max_val
#   for(i in seq_len(n_items)){
#     poss_values <- c(poss_values, min_val:(max_val-1) + (1 / n_items) * (i - 1))
#   }
#   poss_values <- sort(poss_values)
#   
#   for(abm in list(c(aMin, bMin, 1), c(aMax, bMax, 2))){
#     
#     a <- abm[1]
#     b <- abm[2]
#     m <- abm[3]
#     
#     # Adjust a and b to be within min_val and max_val
#     a <- min(max(a, min_val), max_val)
#     b <- min(max(b, min_val), max_val)
#    
#     if(a == b){
#       vec <- rep(a, n_obs)
#     } else {
#       k <- round((total - (n_obs * b)) / (a - b))
#       k <- min(max(k, 1), n_obs - 1)
#       vec <- c(rep(a, k), rep(b, n_obs - k))
#       diff <- sum(vec) - total
#       
#       if ((diff < 0)) {
#         vec <- c(rep(a, k - 1), a + abs(diff), rep(b, n_obs - k))
#       } else if ((diff > 0)) {
#         vec <- c(rep(a, k), b - diff, rep(b, n_obs - k - 1))
#       }
#     }
#     
#     # Check if the calculated mean and values match expected conditions
#     if(round(mean(vec), sd_prec) == round(mean, sd_prec) & 
#         all(floor(vec*10e9) %in% floor(poss_values*10e9))){
#       result[m] <- round(sd(vec), sd_prec)
#     }
#     
#   }
#   
#   # Replace Inf or -Inf with NA
#   result[is.infinite(result)] <- NA
#   
#   res <- 
#     data.frame(sd_min = result[1],
#                sd_max = result[2]) |>
#     mutate(tides = case_when(sd < sd_min ~ FALSE,
#                              is.na(sd_min) ~ FALSE,
#                              sd > sd_max ~ FALSE,
#                              is.na(sd_max) ~ FALSE,
#                              TRUE ~ TRUE))
#   
#   return(res)
# }

# tides <- function(n_obs, mean, sd, min_val, max_val, sd_prec = NULL, n_items = 1) {
#   
#   if(is.null(sd_prec)){
#     sd_prec <- max(nchar(sub("^[0-9]*", "", mean)) - 1, 0)
#   }
#   
#   result <- c(-Inf, Inf)
#   
#   aMax <- min_val
#   aMin <- floor(mean*n_items)/n_items
#   bMax <- min(max(max_val, min_val + 1, aMin + 1), max_val) 
#   bMin <- min(aMin + 1/n_items, max_val)                    
#   total <- round(mean * n_obs * n_items)/n_items
#   
#   poss_values <- max_val
#   for(i in seq_len(n_items)){
#     poss_values <- c(poss_values, min_val:(max_val-1) + (1 / n_items) * (i - 1))
#   }
#   poss_values <- sort(poss_values)
#   
#   for(abm in list(c(aMin, bMin, 1), c(aMax, bMax, 2))){
#     
#     a <- abm[1]
#     b <- abm[2]
#     m <- abm[3]
#     
#     # Adjust a and b to be within min_val and max_val
#     a <- min(max(a, min_val), max_val)
#     b <- min(max(b, min_val), max_val)
#     
#     if(a == b){
#       vec <- rep(a, n_obs)
#     } else {
#       k <- round((total - (n_obs * b)) / (a - b))
#       k <- min(max(k, 1), n_obs - 1)
#       vec <- c(rep(a, k), rep(b, n_obs - k))
#       diff <- sum(vec) - total
#       
#       if ((diff < 0)) {
#         vec <- c(rep(a, k - 1), a + abs(diff), rep(b, n_obs - k))
#       } else if ((diff > 0)) {
#         vec <- c(rep(a, k), b - diff, rep(b, n_obs - k - 1))
#       }
#     }
#     
#     # # Check if the calculated mean and values match expected conditions
#     # if(round(mean(vec), sd_prec) == round(mean, sd_prec) & 
#     #    all(floor(vec*10e9) %in% floor(poss_values*10e9))){
#     #   result[m] <- round(sd(vec), sd_prec)
#     # }
#     
#     tolerance <- 1e-6  # Adjust as appropriate
#     
#     if(abs(mean(vec) - mean) < tolerance & 
#        all(floor(vec * 1e9) %in% floor(poss_values * 1e9))){
#       result[m] <- round(sd(vec), sd_prec)
#     }
#     
#   }
#   
#   # Replace Inf or -Inf with NA
#   result[is.infinite(result)] <- NA
#   
#   # Create the data frame with the new columns
#   res <- data.frame(
#     sd_min = result[1],
#     sd_max = result[2]
#   ) |> mutate(
#     tides_range_calculable = !is.na(sd_min) & !is.na(sd_max),
#     tides_inside_ranges = ifelse(
#       tides_range_calculable,
#       sd >= sd_min & sd <= sd_max,
#       FALSE
#     ),
#     tides = tides_range_calculable & tides_inside_ranges
#   )
#   
#   return(res)
# }

tides <- function(n_obs, mean, sd, min_val, max_val, sd_prec = NULL, n_items = 1) {
  
  if(is.null(sd_prec)){
    sd_prec <- max(nchar(sub("^[0-9]*", "", mean)) - 1, 0)
  }
  
  # Custom rounding functions
  round_half_up <- function(x, digits = 0) {
    posneg <- sign(x)
    z <- abs(x) * 10^digits + 0.5
    z <- trunc(z)
    z <- z / 10^digits
    z * posneg
  }
  
  round_half_down <- function(x, digits = 0) {
    posneg <- sign(x)
    z <- abs(x) * 10^digits - 0.5
    z <- trunc(z)
    z <- z / 10^digits
    z * posneg
  }
  
  # Function to check mean equality with different rounding methods
  mean_matches <- function(vec_mean, target_mean, sd_prec) {
    rounded_means_vec <- c(round_half_up(vec_mean, sd_prec), round_half_down(vec_mean, sd_prec))
    rounded_means_target <- c(round_half_up(target_mean, sd_prec), round_half_down(target_mean, sd_prec))
    any(rounded_means_vec %in% rounded_means_target)
  }
  
  result <- c(-Inf, Inf)
  
  aMin <- floor(mean * n_items) / n_items
  aMax <- min_val
  
  bMin <- min(aMin + 1 / n_items, max_val)
  bMax <- min(max(max_val, min_val + 1, aMin + 1), max_val)
  
  total <- round(mean * n_obs * n_items) / n_items
  
  poss_values <- seq(min_val, max_val, by = 1 / n_items)
  
  for(abm in list(c(aMin, bMin, 1), c(aMax, bMax, 2))){
    
    a <- abm[1]
    b <- abm[2]
    m <- abm[3]
    
    # Adjust a and b to be within min_val and max_val
    a <- min(max(a, min_val), max_val)
    b <- min(max(b, min_val), max_val)
    
    if(a == b){
      vec <- rep(a, n_obs)
    } else {
      k <- round((total - (n_obs * b)) / (a - b))
      k <- min(max(k, 1), n_obs - 1)
      vec <- c(rep(a, k), rep(b, n_obs - k))
      diff <- sum(vec) - total
      
      if ((diff < 0)) {
        adjusted_value <- a + abs(diff)
        if(!(adjusted_value %in% poss_values)){
          next  # Skip to next iteration if adjusted value is invalid
        }
        vec <- c(rep(a, k - 1), adjusted_value, rep(b, n_obs - k))
      } else if ((diff > 0)) {
        adjusted_value <- b - diff
        if(!(adjusted_value %in% poss_values)){
          next  # Skip to next iteration if adjusted value is invalid
        }
        vec <- c(rep(a, k), adjusted_value, rep(b, n_obs - k - 1))
      }
    }
    
    # Check if the calculated mean and values match expected conditions
    vec_mean <- mean(vec)
    
    if(mean_matches(vec_mean, mean, sd_prec) &
       all(floor(vec * 1e9) %in% floor(poss_values * 1e9))){
      result[m] <- round(sd(vec), sd_prec)
    }
    
  }
  
  # Replace Inf or -Inf with NA
  result[is.infinite(result)] <- NA
  
  # Create the data frame with the new columns
  res <- 
    tibble(sd_min = result[1],
           sd_max = result[2]) |> 
    mutate(tides_sd_range_calculable = !is.na(sd_min) & !is.na(sd_max),
           tides_inside_ranges = ifelse(tides_sd_range_calculable,
                                        mean >= min_val & mean <= max_val & sd >= sd_min & sd <= sd_max,
                                        mean >= min_val & mean <= max_val),
           tides = tides_sd_range_calculable & tides_inside_ranges) |>
    # pomp scores 
    mutate(pomp_m = (mean - min_val)/(max_val - min_val),
           pomp_sd = ifelse(!is.na(sd_min) & !is.na(sd_max), (sd - sd_min)/(sd_max - sd_min), NA))
  
  return(res)
}

Get data

HDRS-17 (17-item version): min 0, Max score of 52 (most common) HDRS-21 (21-item version): min 0, Max score of 64 HDRS-24 (24-item version): min 0, Max score of 74 HDRS-6 (6-item version): min 0, Max score of 22

PHQ-9: 0 to 27

ces-d: 0 to 60

Edinburgh Postnatal Depression Scale (EPDS): 0 to 30

Geriatric Depression Scale (GDS): - GDS-30 0 to 30 - GDS-15 0 to 15 * more common apparently, but worse fit to this data

bdi-1 155
hdrs 150
bdi-2 138
phq-9 98
ces-d 78
epds 46
gds 36

701 of 925 rows covered by these scales

using all scales with 10+ uses would bring it to about 800 of 925

dat <- read_xlsx("../data/The depression database - psy vs ctr.xlsx") |>
                 # col_types = c("text", # study 
                 #               "text", # CHANGES 
                 #               "text", # condition_arm1 
                 #               "text", # condition_arm2 
                 #               "text", # multi_arm1 
                 #               "text", # multi_arm2 
                 #               "numeric", # .g 
                 #               "numeric", # .g_se
                 #               "numeric", # primary_calc 
                 #               "text", # outcome_type 
                 #               "text", # instrument 
                 #               "text", # rating 
                 #               "text", # mean_arm1 
                 #               "text", # sd_arm1 
                 #               "text", # n_arm1 
                 #               "text", # mean_arm2 
                 #               "text", # sd_arm2 
                 #               "text", # n_arm2 
                 #               "text", # mean_change_arm1 
                 #               "text", # mean_change_arm2 
                 #               "text", # sd_change_arm1 
                 #               "text", # sd_change_arm2 
                 #               "text", # n_change_arm1 
                 #               "text", # n_change_arm2 
                 #               "text", # dich_paper 
                 #               "numeric", # event_arm1 
                 #               "numeric", # event_arm2 
                 #               "numeric", # totaln_arm1 
                 #               "numeric", # totaln_arm2 
                 #               "numeric", # precalc_g 
                 #               "numeric", # precalc_g_se 
                 #               "logical", # precalc_log_rr 
                 #               "logical", # precalc_log_rr_se 
                 #               "text", # other_stat 
                 #               "text", # baseline_m_arm1 
                 #               "text", # baseline_sd_arm1 
                 #               "text", # baseline_n_arm1 
                 #               "text", # baseline_m_arm2 
                 #               "text", # baseline_sd_arm2 
                 #               "text", # baseline_n_arm2 
                 #               "numeric", # rand_arm1 
                 #               "numeric", # rand_arm2 
                 #               "numeric", # attr_arm1 
                 #               "numeric", # attr_arm2 
                 #               "text", # rand_ratio 
                 #               "numeric", # year 
                 #               "text", # time_weeks
                 #               "text", # time_weeks_needs_cleaning 
                 #               "text", # time
                 #               "text", # comorbid_mental 
                 #               "text", # format 
                 #               "text", # format_details 
                 #               "text", # n_sessions_arm1 
                 #               "text", # country 
                 #               "text", # age_group 
                 #               "text", # mean_age 
                 #               "text", # percent_women 
                 #               "text", # recruitment 
                 #               "text", # diagnosis 
                 #               "text", # target_group 
                 #               "text", # sg 
                 #               "text", # ac 
                 #               "text", # ba 
                 #               "text", # itt 
                 #               "numeric", # rob 
                 #               "text", # adm 
                 #               "text", # notes 
                 #               "numeric", # outcome_rank 
                 #               "numeric", # primary 
                 #               "numeric", # .log_rr 
                 #               "numeric", # .log_rr_se 
                 #               "numeric", # .event_arm1 
                 #               "numeric", # .event_arm2
                 #               "numeric", # .totaln_arm1 
                 #               "numeric", # .totaln_arm2 
                 #               "text", # full_ref 
                 #               "text", # doi 
                 #               "text", # abstract 
                 #               "text", # title 
                 #               "text", # url 
                 #               "text", # journal 
                 #               "text", # id_study 
                 #               "text")) |> # .id
  janitor::clean_names() |>
  select(study, instrument, notes, 
         hedges_g = g,
         mean_age, percent_women,
         followup_m_1 = mean_arm1, 
         followup_sd_1 = sd_arm1, 
         followup_n_1 = n_arm1, 
         followup_m_2 = mean_arm2, 
         followup_sd_2 = sd_arm2, 
         followup_n_2 = n_arm2, 
         baseline_m_1 = baseline_m_arm1, 
         baseline_sd_1 = baseline_sd_arm1, 
         baseline_n_1 = baseline_n_arm1, 
         baseline_m_2 = baseline_m_arm2, 
         baseline_sd_2 = baseline_sd_arm2, 
         baseline_n_2 = baseline_n_arm2) |>
  mutate(across(contains("_m_") | contains("_sd_"), round_half_up, digits = 2),
         across(contains("_n_"), round_half_up, digits = 0)) |>
  # tidy up instrument names
  mutate(instrument = case_when(
    # for the sake of the below tests, bdi versions have the same scoring so can be treated equivalently
    instrument == "bdi-1" ~ "bdi",
    instrument == "bdi-2" ~ "bdi",
    instrument == "bdi-21" ~ "bdi",
    # seems like DASS hasn't been standardized. although there are 21 and 42 item versions, need to check these aren't apples with oranges *** TODO ***
    instrument == "dass-d" ~ "dass-d",
    instrument == "dass21-d" ~ "dass-d",
    instrument == "dass" ~ "dass-d",
    instrument == "DASS-21-D" ~ "dass-d",
    # some misspellings of "hdrs" or use of alternative acronyms
    instrument == "hrds-17" ~ "hdrs", # the modal length scale is the 17 item, so i recode this to hrds on the assumption that's the 17 item.
    instrument == "hrsd-17" ~ "hdrs", # the modal length scale is the 17 item, so i recode this to hrds on the assumption that's the 17 item.
    instrument == "hdrs-17" ~ "hdrs", # the modal length scale is the 17 item, so i recode this to hrds on the assumption that's the 17 item.
    instrument == "ham-d-18" ~ "hdrs-18", # im not aware of an 18 item version
    instrument == "hamd21" ~ "hdrs-21",
    # ill assume that these two versions of the HADS are equivalent
    instrument == "hads-d-severity" ~ "hads-d",
    # combined qids versions that are likely the same
    instrument == "qids-sr16" ~ "qids",
    instrument == "qids16sr" ~ "qids",
    # regardless of whether SR or clinican rated, these are scored the same and can be combined here
    instrument == "qids-c" ~ "qids",
    instrument == "qids-sr" ~ "qids",
    # regardless of whether SR or clinican rated, these are scored the same and can be combined here
    instrument == "madrs-s" ~ "madrs",
    instrument == "madrs-sr" ~ "madrs",
    instrument == "madrs-cr" ~ "madrs",
    TRUE ~ instrument)
  )

dat |>
  #filter(str_detect(instrument, "^q")) |>
  count(instrument) |>
  #pull(instrument)
  arrange(desc(n))
## # A tibble: 58 × 2
##    instrument     n
##    <chr>      <int>
##  1 bdi          303
##  2 hdrs         159
##  3 phq-9         98
##  4 ces-d         78
##  5 epds          46
##  6 gds           36
##  7 madrs         26
##  8 hads-d        18
##  9 qids          17
## 10 mmpi-d        16
## # ℹ 48 more rows
# dat |>
#   count(instrument) |>
#   #pull(instrument)
#   arrange(desc(n)) |>
#   slice(1:10) |>
#   summarize(total = sum(n))

# top 10 = 797 / 925 = 86% of N
# top 13 (those used at least 10 times) = 834 / 925 = 90% of N

top_ten_measures <- dat |>
  filter(instrument != "mmpi-d") |> # this is apparently scored using t score but can vary, exclude for the moment
  count(instrument) |>
  #pull(instrument)
  arrange(desc(n)) |>
  slice(1:10) |>
  pull(instrument) |>
  dput()
## c("bdi", "hdrs", "phq-9", "ces-d", "epds", "gds", "madrs", "hads-d", 
## "qids", "scl")
dat_for_grim <- dat |>
  mutate(min_val = case_when(instrument %in% top_ten_measures ~ 0,
                             TRUE ~ NA),
         max_val = case_when(instrument == "bdi" ~ 63,
                             instrument == "hdrs" ~ 52, # assuming 17 item version
                             instrument == "phq-9" ~ 27,
                             instrument == "ces-d" ~ 60,
                             instrument == "epds" ~ 30,
                             instrument == "gds" ~ 30, # assuming 30 item not 15 item version
                             instrument == "madrs" ~ 60,
                             instrument == "hads-d" ~ 21,
                             instrument == "qids" ~ 27, # assuming 16 item version
                             instrument == "scl" ~ 360, # assuming 90 item version not 25 or 27
                             TRUE ~ NA)) |>
  #filter(str_detect(tolower(instrument), "bdi")) |>
  #mutate(across(contains("_n_"), as.character)) |>
  pivot_longer(cols = c(starts_with("followup"), starts_with("baseline")),
               names_to = c("timepoint", ".value", "arm"),
               names_sep = "_") |>
  drop_na(m, sd, n) 

#write_csv(dat_bdi, "../data/dat_bdi.csv")

Explore phq-9

dat_phq9 <- dat |>
  filter(instrument == "phq-9") |>
  mutate(min_val = 0,
         max_val = 27) |>
  pivot_longer(cols = c(starts_with("followup"), starts_with("baseline")),
               names_to = c("timepoint", ".value", "arm"),
               names_sep = "_") |>
  drop_na(m, sd, n)
  # filter(sd <= (27-1)/2 & sd >= 0 & # exclude impossible values given the min min and max max SD
  #          m >= 0 & m <= 27) # exclude impossible values given the min and max mean

ggplot(dat_phq9, aes(sd)) +
  geom_histogram(boundary = 0, binwidth = 0.25) +
  scale_x_continuous(breaks = scales::breaks_pretty(n = 10)) # , limits = c(0, (27-1)/2)

ggplot(dat_phq9, aes(m)) +
  geom_histogram(boundary = 0, binwidth = 1) +
  scale_x_continuous(breaks = scales::breaks_pretty(n = 10)) # , limits = c(0, 27)

# just follow up, between groups
ggplot(dat_phq9, aes(m)) +
  geom_histogram(boundary = 0, binwidth = 1) +
  scale_x_continuous(breaks = scales::breaks_pretty(n = 10)) + # , limits = c(0, 27)
  facet_grid(arm ~ timepoint)

dat_phq_diff <- dat |>
  filter(instrument == "phq-9") |>
  mutate(mean_diff_followup = followup_m_1 - followup_m_2,
         mean_mean_diff_followup = mean(mean_diff_followup, na.rm = TRUE),
         sd_mean_diff_followup = sd(mean_diff_followup, na.rm = TRUE)) 

ggplot(dat_phq_diff, aes(mean_diff_followup)) +
  geom_histogram(boundary = 0, binwidth = 0.5) +
  geom_vline(aes(xintercept = mean_mean_diff_followup)) + 
  scale_x_continuous(breaks = scales::breaks_pretty(n = 10)) 

Unstandardized mean difference at followup

dat_diff <- dat |>
  mutate(mean_diff_followup = followup_m_1 - followup_m_2,
         mean_diff_baseline = baseline_m_1 - baseline_m_2) 

dat_diff_summary <- dat_diff |>
  group_by(instrument) |>
  summarize(mean_mean_diff_followup = mean(mean_diff_followup, na.rm = TRUE),
            median_mean_diff_followup = median(mean_diff_followup, na.rm = TRUE),
            sd_mean_diff_followup = sd(mean_diff_followup, na.rm = TRUE),
            mean_mean_diff_baseline = mean(mean_diff_baseline, na.rm = TRUE),
            median_mean_diff_baseline = median(mean_diff_baseline, na.rm = TRUE),
            sd_mean_diff_baseline = sd(mean_diff_baseline, na.rm = TRUE),
            n = n()) |>
  filter(n >= 10) #including NAs

dat_diff_trimmed <- dat_diff |>
  semi_join(dat_diff_summary, by = "instrument")

ggplot(dat_diff_trimmed, aes(mean_diff_followup)) +
  geom_histogram(boundary = 0, binwidth = 1) +
  geom_vline(data = dat_diff_summary, aes(xintercept = mean_mean_diff_followup), color = "blue", linetype = "dashed") + 
  geom_vline(data = dat_diff_summary, aes(xintercept = median_mean_diff_followup), color = "red", linetype = "dashed") + 
  scale_x_continuous(breaks = scales::breaks_pretty(n = 5)) +
  facet_wrap(~ instrument, scale = "free")

BDI issues:

  • Otu, 2023
  • Mukhtar, 2011
  • Eseadi, 2022
  • Eseadi, 2018
  • Victor-Aigbodion, 2023

hrds issues:

  • Chiang, 2015
  • Okeke, 2023

PHQ9:

  • Nasrin, 2017

CES-D issues:

  • Ede, 2020

Unstandardized mean difference at baseline

ggplot(dat_diff_trimmed, aes(mean_diff_baseline)) +
  geom_histogram(boundary = 0, binwidth = 1) +
  geom_vline(data = dat_diff_summary, aes(xintercept = mean_mean_diff_baseline), color = "blue", linetype = "dashed") + 
  geom_vline(data = dat_diff_summary, aes(xintercept = median_mean_diff_baseline), color = "red", linetype = "dashed") + 
  scale_x_continuous(breaks = scales::breaks_pretty(n = 5)) +
  facet_wrap(~ instrument, scale = "free")

BDI issues

  • Kay-Lambkin, 2009

CES-D issues:

  • Afonso, 2009
  • Clark, 2003

PHQ-9 issues:

  • Abas, 2018
  • Noone, 2022

Studies where differences at baseline are large but at followup are small

Where the original study may have broken randomisation and controlled for baseline, ineffective interventions can be made to look effective.

ggplot(dat_diff_trimmed, aes(mean_diff_baseline, mean_diff_followup)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_vline(xintercept = 0, linetype = "dashed") +
  geom_point() + 
  facet_wrap(~ instrument, scale = "free")

Studies where differences at baseline are large but followup are small

Studies inside the triangles to the left and right of the 0 have larger differences at baseline than followup and might be worth looking at.

dat_pomp <- dat |>
  mutate(min_val = case_when(instrument %in% top_ten_measures ~ 0,
                             TRUE ~ NA),
         max_val = case_when(instrument == "bdi" ~ 63,
                             instrument == "hdrs" ~ 52, # assuming 17 item version
                             instrument == "phq-9" ~ 27,
                             instrument == "ces-d" ~ 60,
                             instrument == "epds" ~ 30,
                             instrument == "gds" ~ 30, # assuming 30 item not 15 item version
                             instrument == "madrs" ~ 60,
                             instrument == "hads-d" ~ 21,
                             instrument == "qids" ~ 27, # assuming 16 item version
                             instrument == "scl" ~ 360, # assuming 90 item version not 25 or 27
                             TRUE ~ NA)) |>
  mutate(pomp_mean_1_followup = (followup_m_1 - min_val)/(max_val - min_val),
         pomp_mean_2_followup = (followup_m_2 - min_val)/(max_val - min_val),
         pomp_mean_1_baseline = (baseline_m_1 - min_val)/(max_val - min_val),
         pomp_mean_2_baseline = (baseline_m_2 - min_val)/(max_val - min_val)) |>
  pivot_longer(cols = starts_with("pomp"),
               names_to = "arm_timepoint",
               values_to = "pomp_mean") |>
  separate(arm_timepoint, into = c("scoring", "original_metric", "arm", "timepoint")) |>
  select(-scoring, -original_metric) |>
  drop_na(pomp_mean) |>
  pivot_wider(names_from = arm,
              names_prefix = "pomp_mean_condition_",
              values_from = pomp_mean) |>
  mutate(diff_pomp = pomp_mean_condition_1 - pomp_mean_condition_2) |>
  select(-pomp_mean_condition_1, -pomp_mean_condition_2) |>
  pivot_wider(names_from = timepoint,
              names_prefix = "pomp_mean_diff_",
              values_from = diff_pomp)


ggplot(dat_pomp, aes(pomp_mean_diff_baseline, pomp_mean_diff_followup, color = instrument)) +
  geom_abline(slope = 0.5, intercept = 0, linetype = "dashed") +
  geom_abline(slope = -0.5, intercept = 0, linetype = "dashed") +
  geom_hline(yintercept = 0, linetype = "dotted") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  geom_point() 

ggplot(dat_pomp, aes(pomp_mean_diff_baseline, pomp_mean_diff_followup)) +
  geom_abline(slope = 0.5, intercept = 0, linetype = "dashed") +
  geom_abline(slope = -0.5, intercept = 0, linetype = "dashed") +
  geom_hline(yintercept = 0, linetype = "dotted") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  geom_point() + 
  facet_wrap(~ instrument, scale = "free") 

SD by condition and timepoint

dat_reshaped <- dat |>
  pivot_longer(cols = c(starts_with("followup"), starts_with("baseline")),
               names_to = c("timepoint", ".value", "arm"),
               names_sep = "_") |>
  drop_na(m, sd, n) |>
  filter(timepoint == "followup")

dat_reshaped_summary <- dat_reshaped |>
  group_by(instrument, arm) |>
  summarize(mean_sd = mean(sd, na.rm = TRUE),
            median_sd = median(sd, na.rm = TRUE),
            sd_sd = sd(sd, na.rm = TRUE),
            n = n()) |>
  filter(n >= 10)

dat_reshaped_trimmed <- dat_reshaped |>
  semi_join(dat_reshaped_summary, by = "instrument")

ggplot(dat_reshaped_trimmed, aes(sd)) +
  geom_histogram(boundary = 0, binwidth = 1) +
  geom_vline(data = dat_reshaped_summary, aes(xintercept = mean_sd), color = "blue", linetype = "dashed") + 
  geom_vline(data = dat_reshaped_summary, aes(xintercept = median_sd), color = "red", linetype = "dashed") + 
  scale_x_continuous(breaks = scales::breaks_pretty(n = 5)) +
  facet_wrap(~ instrument + arm, scale = "free")

BDI issues:

  • Eseadi, 2022
  • Otu, 2023
  • Zemestani, 2020b
  • Eseadi, 2018

PHQ9 issues:

  • Wright, 2022

CES-D issues:

  • Ede, 2020
  • Boele, 2021

GRIM, GRIMMER & TIDES

res <- dat_for_grim |>
  mutate(m_prec  = 2, # hard coded by rounding above for the moment
         sd_prec = 2, # hard coded by rounding above for the moment
         n_items = 1) |>
  mutate(grim = pmap(list(as.character(m), n), grim)) |>
  unnest(grim) |>
  mutate(grimmer = pmap(list(as.character(m), as.character(sd), n), grimmer)) |>
  unnest(grimmer) |>
  mutate(tides = pmap(list(n, m, sd, min_val, max_val, sd_prec, n_items), possibly(tides, otherwise = NA))) |> 
  unnest(tides) |>
  # master variable
  mutate(tides = ifelse(is.na(tides), TRUE, tides)) |>
  mutate(all_three = ifelse(grim + grimmer + tides < 3, FALSE, TRUE))
  

# res |>
#   filter(timepoint == "followup") |>
#   group_by(study) |>
#   summarize(grim = as.logical(min(grim))) |>
#   count(grim) |>
#   kable(align = "r") |>
#   kable_classic(full_width = FALSE)
# 
# res |>
#   filter(timepoint == "followup") |>
#   group_by(study) |>
#   summarize(grimmer = as.logical(min(grimmer))) |>
#   count(grimmer) |>
#   kable(align = "r") |>
#   kable_classic(full_width = FALSE)
# 
# res |>
#   filter(timepoint == "followup") |>
#   group_by(study) |>
#   summarize(tides = as.logical(min(tides))) |>
#   count(tides) |>
#   kable(align = "r") |>
#   kable_classic(full_width = FALSE)
# 
# res |>
#   filter(timepoint == "followup") |>
#   group_by(study) |>
#   summarize(grim = as.logical(min(grim)),
#             grimmer = as.logical(min(grimmer)),
#             tides = as.logical(min(tides))) |>
#   count(grim, grimmer, tides) |>
#   kable(align = "r") |>
#   kable_classic(full_width = FALSE)
# 
# res |>
#   filter(timepoint == "followup") |>
#   group_by(study) |>
#   summarize(grim = as.logical(min(grim)),
#             grimmer = as.logical(min(grimmer)),
#             tides = as.logical(min(tides))) |>
#   count() |>
#   kable(align = "r") |>
#   kable_classic(full_width = FALSE)


res |>
  count(grim, grimmer, tides) |>
  mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
grim grimmer tides n percent
FALSE FALSE FALSE 103 3.1
FALSE FALSE TRUE 336 10.2
TRUE FALSE FALSE 1 0.0
TRUE FALSE TRUE 106 3.2
TRUE TRUE FALSE 247 7.5
TRUE TRUE TRUE 2511 76.0
# all tests, all stats
res |>
  count(tides) |>
  mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
tides n percent
FALSE 351 10.6
TRUE 2953 89.4
# tides range check, all stats
res |>
  count(tides_inside_ranges) |>
  mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
tides_inside_ranges n percent
FALSE 13 0.4
TRUE 2847 86.2
NA 444 13.4
# tides range check, RCT level
res |>
  group_by(study) |>
  summarize(tides_inside_ranges = as.logical(min(tides_inside_ranges))) |>
  count(tides_inside_ranges) |>
  mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
tides_inside_ranges n percent
FALSE 7 1.6
TRUE 356 82.0
NA 71 16.4
res |>
  mutate(tides_inside_ranges_binary = ifelse(is.na(tides_inside_ranges), TRUE, tides_inside_ranges)) |>
  group_by(study) |>
  summarize(grim = as.logical(min(grim)),
            grimmer = as.logical(min(grimmer)),
            tides_inside_ranges_binary = as.logical(min(tides_inside_ranges_binary))) |>
  count(grim, grimmer, tides_inside_ranges_binary) |>
  mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
grim grimmer tides_inside_ranges_binary n percent
FALSE FALSE FALSE 4 0.9
FALSE FALSE TRUE 133 30.6
TRUE FALSE TRUE 16 3.7
TRUE TRUE FALSE 3 0.7
TRUE TRUE TRUE 278 64.1
# tides, all stats
res |>
  count(all_three) |>
  mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
all_three n percent
FALSE 793 24
TRUE 2511 76
# all tests, follow up only
res |>
  filter(timepoint == "followup") |>
  count(all_three) |>
  mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
all_three n percent
FALSE 456 27.3
TRUE 1214 72.7
# all tests, all stats, article level
res |>
  group_by(study) |>
  summarize(all_three = as.logical(min(all_three))) |>
  count(all_three) |>
  mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
all_three n percent
FALSE 217 50
TRUE 217 50
# all tests, follow up only, article level
res |>
  filter(timepoint == "followup") |>
  group_by(study) |>
  summarize(all_three = as.logical(min(all_three))) |>
  count(all_three) |>
  mutate(percent = round_half_up(n/sum(n)*100, 1)) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
all_three n percent
FALSE 189 44.1
TRUE 240 55.9

Plot

# ggplot(res, aes(pomp_m, pomp_sd, color = all_tests)) +
#   geom_point(shape = 15, size = 2, alpha = 0.8) +
#   #geom_line() +
#   theme_linedraw() +
#   scale_color_viridis_d(begin = 0.3, end = 0.7) +
#   xlab("POMP Mean") +
#   ylab("POMP SD")
# 

res |>
  filter(!is.na(tides_inside_ranges)) |>
  ggplot(aes(m, sd, color = tides_inside_ranges)) +
  geom_point(alpha = 0.75) + # shape = 15,  size = 2, 
  #geom_line() +
  theme_linedraw() +
  scale_color_viridis_d(begin = 0.3, end = 0.7) +
  #xlim(0,1) + # could cut off impossible results!
  xlab("Mean") +
  ylab("SD") +
  facet_wrap(~ instrument, scales = "free")

res |>
  filter(!is.na(tides_inside_ranges)) |>
  ggplot(aes(pomp_m, pomp_sd, color = tides_inside_ranges)) +
  geom_rect(aes(xmin = 0, xmax = 1, ymin = 0, ymax = 1), fill = "grey90", color = NA) +
  geom_point(alpha = 0.5) + # shape = 15,  size = 2, 
  #geom_line() +
  theme_linedraw() +
  scale_color_viridis_d(begin = 0.3, end = 0.7) +
  xlim(0,1) + # could cut off impossible results!
  xlab("POMP Mean") +
  ylab("POMP SD")

res |>
  filter(!is.na(tides_inside_ranges)) |>
  ggplot(aes(pomp_m, pomp_sd, color = tides_inside_ranges)) +
  geom_rect(aes(xmin = 0, xmax = 1, ymin = 0, ymax = 1), fill = "grey90", color = NA) +
  geom_point(alpha = 0.5) + # shape = 15,  size = 2, 
  #geom_line() +
  theme_linedraw() +
  scale_color_viridis_d(begin = 0.3, end = 0.7) +
  xlim(0,1) + # could cut off impossible results!
  xlab("POMP Mean") +
  ylab("POMP SD") +
  facet_wrap(~ instrument)